; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -mtriple=i386-linux-gnu   -mattr=+sse2 -global-isel -stop-after=irtranslator < %s | FileCheck %s --check-prefix=X86
; RUN: llc -O0 -mtriple=x86_64-linux-gnu              -global-isel -stop-after=irtranslator < %s | FileCheck %s --check-prefix=X64

@a1_8bit = external global i8
@a7_8bit = external global i8
@a8_8bit = external global i8

define i8 @test_i8_args_8(i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7, i8 %arg8) {
  ; X86-LABEL: name: test_i8_args_8
  ; X86: bb.1.entry:
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.7, align 16)
  ; X86-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.6, align 4)
  ; X86-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s8) from %fixed-stack.5, align 8)
  ; X86-NEXT:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD2]](s32)
  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s8) from %fixed-stack.4, align 4)
  ; X86-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD3]](s32)
  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s8) from %fixed-stack.3, align 16)
  ; X86-NEXT:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD4]](s32)
  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s8) from %fixed-stack.2, align 4)
  ; X86-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD5]](s32)
  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s8) from %fixed-stack.1, align 8)
  ; X86-NEXT:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD6]](s32)
  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s8) from %fixed-stack.0, align 4)
  ; X86-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD7]](s32)
  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
  ; X86-NEXT:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
  ; X86-NEXT:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
  ; X86-NEXT:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
  ; X86-NEXT:   $al = COPY [[TRUNC]](s8)
  ; X86-NEXT:   RET 0, implicit $al
  ; X64-LABEL: name: test_i8_args_8
  ; X64: bb.1.entry:
  ; X64-NEXT:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
  ; X64-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
  ; X64-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
  ; X64-NEXT:   [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
  ; X64-NEXT:   [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[COPY3]](s32)
  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
  ; X64-NEXT:   [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[COPY4]](s32)
  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
  ; X64-NEXT:   [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[COPY5]](s32)
  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s8) from %fixed-stack.1, align 16)
  ; X64-NEXT:   [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD]](s32)
  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s8) from %fixed-stack.0, align 8)
  ; X64-NEXT:   [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[LOAD1]](s32)
  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_8bit
  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_8bit
  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_8bit
  ; X64-NEXT:   G_STORE [[TRUNC]](s8), [[GV]](p0) :: (store (s8) into @a1_8bit)
  ; X64-NEXT:   G_STORE [[TRUNC6]](s8), [[GV1]](p0) :: (store (s8) into @a7_8bit)
  ; X64-NEXT:   G_STORE [[TRUNC7]](s8), [[GV2]](p0) :: (store (s8) into @a8_8bit)
  ; X64-NEXT:   $al = COPY [[TRUNC]](s8)
  ; X64-NEXT:   RET 0, implicit $al
entry:
  store i8 %arg1, ptr @a1_8bit
  store i8 %arg7, ptr @a7_8bit
  store i8 %arg8, ptr @a8_8bit
  ret i8 %arg1
}

@a1_32bit = external global i32
@a7_32bit = external global i32
@a8_32bit = external global i32

define i32 @test_i32_args_8(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8) {
  ; X86-LABEL: name: test_i32_args_8
  ; X86: bb.1.entry:
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.6)
  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.4)
  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.2)
  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.0)
  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
  ; X86-NEXT:   G_STORE [[LOAD6]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
  ; X86-NEXT:   G_STORE [[LOAD7]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
  ; X86-NEXT:   $eax = COPY [[LOAD]](s32)
  ; X86-NEXT:   RET 0, implicit $eax
  ; X64-LABEL: name: test_i32_args_8
  ; X64: bb.1.entry:
  ; X64-NEXT:   liveins: $ecx, $edi, $edx, $esi, $r8d, $r9d
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $edx
  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $ecx
  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $r8d
  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $r9d
  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0, align 8)
  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_32bit
  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_32bit
  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_32bit
  ; X64-NEXT:   G_STORE [[COPY]](s32), [[GV]](p0) :: (store (s32) into @a1_32bit)
  ; X64-NEXT:   G_STORE [[LOAD]](s32), [[GV1]](p0) :: (store (s32) into @a7_32bit)
  ; X64-NEXT:   G_STORE [[LOAD1]](s32), [[GV2]](p0) :: (store (s32) into @a8_32bit)
  ; X64-NEXT:   $eax = COPY [[COPY]](s32)
  ; X64-NEXT:   RET 0, implicit $eax
entry:
  store i32 %arg1, ptr @a1_32bit
  store i32 %arg7, ptr @a7_32bit
  store i32 %arg8, ptr @a8_32bit
  ret i32 %arg1
}

@a1_64bit = external global i64
@a7_64bit = external global i64
@a8_64bit = external global i64

; ... a bunch more that we don't track ...
define i64 @test_i64_args_8(i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8) {
  ; X86-LABEL: name: test_i64_args_8
  ; X86: bb.1.entry:
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.15
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.15, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.14
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.14)
  ; X86-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
  ; X86-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.13
  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (invariant load (s32) from %fixed-stack.13, align 8)
  ; X86-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.12
  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (invariant load (s32) from %fixed-stack.12)
  ; X86-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
  ; X86-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.11
  ; X86-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (invariant load (s32) from %fixed-stack.11, align 16)
  ; X86-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.10
  ; X86-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (invariant load (s32) from %fixed-stack.10)
  ; X86-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
  ; X86-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.9
  ; X86-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (invariant load (s32) from %fixed-stack.9, align 8)
  ; X86-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
  ; X86-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (invariant load (s32) from %fixed-stack.8)
  ; X86-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
  ; X86-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
  ; X86-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p0) :: (invariant load (s32) from %fixed-stack.7, align 16)
  ; X86-NEXT:   [[FRAME_INDEX9:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
  ; X86-NEXT:   [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX9]](p0) :: (invariant load (s32) from %fixed-stack.6)
  ; X86-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD8]](s32), [[LOAD9]](s32)
  ; X86-NEXT:   [[FRAME_INDEX10:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
  ; X86-NEXT:   [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX10]](p0) :: (invariant load (s32) from %fixed-stack.5, align 8)
  ; X86-NEXT:   [[FRAME_INDEX11:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
  ; X86-NEXT:   [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX11]](p0) :: (invariant load (s32) from %fixed-stack.4)
  ; X86-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD10]](s32), [[LOAD11]](s32)
  ; X86-NEXT:   [[FRAME_INDEX12:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
  ; X86-NEXT:   [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX12]](p0) :: (invariant load (s32) from %fixed-stack.3, align 16)
  ; X86-NEXT:   [[FRAME_INDEX13:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
  ; X86-NEXT:   [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX13]](p0) :: (invariant load (s32) from %fixed-stack.2)
  ; X86-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD12]](s32), [[LOAD13]](s32)
  ; X86-NEXT:   [[FRAME_INDEX14:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX14]](p0) :: (invariant load (s32) from %fixed-stack.1, align 8)
  ; X86-NEXT:   [[FRAME_INDEX15:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX15]](p0) :: (invariant load (s32) from %fixed-stack.0)
  ; X86-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD14]](s32), [[LOAD15]](s32)
  ; X86-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
  ; X86-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
  ; X86-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
  ; X86-NEXT:   G_STORE [[MV]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit, align 4)
  ; X86-NEXT:   G_STORE [[MV6]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit, align 4)
  ; X86-NEXT:   G_STORE [[MV7]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit, align 4)
  ; X86-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
  ; X86-NEXT:   $eax = COPY [[UV]](s32)
  ; X86-NEXT:   $edx = COPY [[UV1]](s32)
  ; X86-NEXT:   RET 0, implicit $eax, implicit $edx
  ; X64-LABEL: name: test_i64_args_8
  ; X64: bb.1.entry:
  ; X64-NEXT:   liveins: $rcx, $rdi, $rdx, $rsi, $r8, $r9
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $rdi
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $rsi
  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $rdx
  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $rcx
  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(s64) = COPY $r8
  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(s64) = COPY $r9
  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
  ; X64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
  ; X64-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a1_64bit
  ; X64-NEXT:   [[GV1:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a7_64bit
  ; X64-NEXT:   [[GV2:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @a8_64bit
  ; X64-NEXT:   G_STORE [[COPY]](s64), [[GV]](p0) :: (store (s64) into @a1_64bit)
  ; X64-NEXT:   G_STORE [[LOAD]](s64), [[GV1]](p0) :: (store (s64) into @a7_64bit)
  ; X64-NEXT:   G_STORE [[LOAD1]](s64), [[GV2]](p0) :: (store (s64) into @a8_64bit)
  ; X64-NEXT:   $rax = COPY [[COPY]](s64)
  ; X64-NEXT:   RET 0, implicit $rax
entry:
  store i64 %arg1, ptr @a1_64bit
  store i64 %arg7, ptr @a7_64bit
  store i64 %arg8, ptr @a8_64bit
  ret i64 %arg1
}

define float @test_float_args(float %arg1, float %arg2) {
  ; X86-LABEL: name: test_float_args
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
  ; X86-NEXT:   $fp0 = COPY [[LOAD1]](s32)
  ; X86-NEXT:   RET 0, implicit $fp0
  ; X64-LABEL: name: test_float_args
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $xmm0, $xmm1
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $xmm0
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $xmm1
  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](s32)
  ; X64-NEXT:   RET 0, implicit $xmm0
  ret float %arg2
}

define double @test_double_args(double %arg1, double %arg2) {
  ; X86-LABEL: name: test_double_args
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s64) from %fixed-stack.1, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s64) from %fixed-stack.0)
  ; X86-NEXT:   $fp0 = COPY [[LOAD1]](s64)
  ; X86-NEXT:   RET 0, implicit $fp0
  ; X64-LABEL: name: test_double_args
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $xmm0, $xmm1
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $xmm0
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $xmm1
  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](s64)
  ; X64-NEXT:   RET 0, implicit $xmm0
  ret double %arg2
}

define <4 x i32> @test_v4i32_args(<4 x i32> %arg1, <4 x i32> %arg2) {
  ; X86-LABEL: name: test_v4i32_args
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   liveins: $xmm0, $xmm1
  ; X86-NEXT: {{  $}}
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X86-NEXT:   $xmm0 = COPY [[COPY1]](<4 x s32>)
  ; X86-NEXT:   RET 0, implicit $xmm0
  ; X64-LABEL: name: test_v4i32_args
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $xmm0, $xmm1
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X64-NEXT:   $xmm0 = COPY [[COPY1]](<4 x s32>)
  ; X64-NEXT:   RET 0, implicit $xmm0
  ret <4 x i32> %arg2
}

define <8 x i32> @test_v8i32_args(<8 x i32> %arg1) {
  ; X86-LABEL: name: test_v8i32_args
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   liveins: $xmm0, $xmm1
  ; X86-NEXT: {{  $}}
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X86-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
  ; X86-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
  ; X86-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
  ; X86-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
  ; X86-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
  ; X64-LABEL: name: test_v8i32_args
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $xmm0, $xmm1
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X64-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
  ; X64-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s32>)
  ; X64-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
  ; X64-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
  ; X64-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
  ret <8 x i32> %arg1
}

define void @test_void_return() {
  ; X86-LABEL: name: test_void_return
  ; X86: bb.1.entry:
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_void_return
  ; X64: bb.1.entry:
  ; X64-NEXT:   RET 0
entry:
  ret void
}

define ptr @test_memop_i32(ptr %p1) {
  ; X86-LABEL: name: test_memop_i32
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
  ; X86-NEXT:   $eax = COPY [[LOAD]](p0)
  ; X86-NEXT:   RET 0, implicit $eax
  ; X64-LABEL: name: test_memop_i32
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
  ; X64-NEXT:   $rax = COPY [[COPY]](p0)
  ; X64-NEXT:   RET 0, implicit $rax
  ret ptr %p1;
}

declare void @trivial_callee()
define void @test_trivial_call() {
  ; X86-LABEL: name: test_trivial_call
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   CALLpcrel32 @trivial_callee, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_trivial_call
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   CALL64pcrel32 @trivial_callee, csr_64, implicit $rsp, implicit $ssp
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  call void @trivial_callee()
  ret void
}

declare void @simple_arg_callee(i32 %in0, i32 %in1)
define void @test_simple_arg(i32 %in0, i32 %in1) {
  ; X86-LABEL: name: test_simple_arg
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.1, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (s32) from %fixed-stack.0)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
  ; X86-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
  ; X86-NEXT:   CALLpcrel32 @simple_arg_callee, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_simple_arg
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $edi, $esi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $esi
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   $edi = COPY [[COPY1]](s32)
  ; X64-NEXT:   $esi = COPY [[COPY]](s32)
  ; X64-NEXT:   CALL64pcrel32 @simple_arg_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  call void @simple_arg_callee(i32 %in1, i32 %in0)
  ret void
}

declare void @simple_arg8_callee(i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, i32 %arg8)
define void @test_simple_arg8_call(i32 %in0) {
  ; X86-LABEL: name: test_simple_arg8_call
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 32, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
  ; X86-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C2]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 1)
  ; X86-NEXT:   [[COPY3:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
  ; X86-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY3]], [[C3]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12, align 1)
  ; X86-NEXT:   [[COPY4:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
  ; X86-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY4]], [[C4]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 1)
  ; X86-NEXT:   [[COPY5:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
  ; X86-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY5]], [[C5]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20, align 1)
  ; X86-NEXT:   [[COPY6:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
  ; X86-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY6]], [[C6]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 1)
  ; X86-NEXT:   [[COPY7:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
  ; X86-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY7]], [[C7]](s32)
  ; X86-NEXT:   G_STORE [[LOAD]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28, align 1)
  ; X86-NEXT:   CALLpcrel32 @simple_arg8_callee, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 32, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_simple_arg8_call
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $edi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $edi
  ; X64-NEXT:   ADJCALLSTACKDOWN64 16, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsp
  ; X64-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
  ; X64-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64)
  ; X64-NEXT:   G_STORE [[COPY]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $rsp
  ; X64-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
  ; X64-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C1]](s64)
  ; X64-NEXT:   G_STORE [[COPY]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 8, align 1)
  ; X64-NEXT:   $edi = COPY [[COPY]](s32)
  ; X64-NEXT:   $esi = COPY [[COPY]](s32)
  ; X64-NEXT:   $edx = COPY [[COPY]](s32)
  ; X64-NEXT:   $ecx = COPY [[COPY]](s32)
  ; X64-NEXT:   $r8d = COPY [[COPY]](s32)
  ; X64-NEXT:   $r9d = COPY [[COPY]](s32)
  ; X64-NEXT:   CALL64pcrel32 @simple_arg8_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit $edx, implicit $ecx, implicit $r8d, implicit $r9d
  ; X64-NEXT:   ADJCALLSTACKUP64 16, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  call void @simple_arg8_callee(i32 %in0, i32 %in0, i32 %in0, i32 %in0,i32 %in0, i32 %in0, i32 %in0, i32 %in0)
  ret void
}

declare i32 @simple_return_callee(i32 %in0)
define i32 @test_simple_return_callee() {
  ; X86-LABEL: name: test_simple_return_callee
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 1)
  ; X86-NEXT:   CALLpcrel32 @simple_return_callee, csr_32, implicit $esp, implicit $ssp, implicit-def $eax
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $eax
  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[COPY1]]
  ; X86-NEXT:   $eax = COPY [[ADD]](s32)
  ; X86-NEXT:   RET 0, implicit $eax
  ; X64-LABEL: name: test_simple_return_callee
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   $edi = COPY [[C]](s32)
  ; X64-NEXT:   CALL64pcrel32 @simple_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $eax
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $eax
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY]]
  ; X64-NEXT:   $eax = COPY [[ADD]](s32)
  ; X64-NEXT:   RET 0, implicit $eax
  %call = call i32 @simple_return_callee(i32 5)
  %r = add i32 %call, %call
  ret i32 %r
}

declare <8 x i32> @split_return_callee(<8 x i32> %in0)
define <8 x i32> @test_split_return_callee(<8 x i32> %arg1, <8 x i32> %arg2) {
  ; X86-LABEL: name: test_split_return_callee
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   liveins: $xmm0, $xmm1, $xmm2
  ; X86-NEXT: {{  $}}
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X86-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (<4 x s32>) from %fixed-stack.0)
  ; X86-NEXT:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[LOAD]](<4 x s32>)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
  ; X86-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
  ; X86-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
  ; X86-NEXT:   CALLpcrel32 @split_return_callee, csr_32, implicit $esp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
  ; X86-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X86-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X86-NEXT:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY3]](<4 x s32>), [[COPY4]](<4 x s32>)
  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
  ; X86-NEXT:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
  ; X86-NEXT:   $xmm0 = COPY [[UV2]](<4 x s32>)
  ; X86-NEXT:   $xmm1 = COPY [[UV3]](<4 x s32>)
  ; X86-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
  ; X64-LABEL: name: test_split_return_callee
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $xmm0, $xmm1, $xmm2, $xmm3
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X64-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
  ; X64-NEXT:   [[COPY2:%[0-9]+]]:_(<4 x s32>) = COPY $xmm2
  ; X64-NEXT:   [[COPY3:%[0-9]+]]:_(<4 x s32>) = COPY $xmm3
  ; X64-NEXT:   [[CONCAT_VECTORS1:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY2]](<4 x s32>), [[COPY3]](<4 x s32>)
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<8 x s32>)
  ; X64-NEXT:   $xmm0 = COPY [[UV]](<4 x s32>)
  ; X64-NEXT:   $xmm1 = COPY [[UV1]](<4 x s32>)
  ; X64-NEXT:   CALL64pcrel32 @split_return_callee, csr_64, implicit $rsp, implicit $ssp, implicit $xmm0, implicit $xmm1, implicit-def $xmm0, implicit-def $xmm1
  ; X64-NEXT:   [[COPY4:%[0-9]+]]:_(<4 x s32>) = COPY $xmm0
  ; X64-NEXT:   [[COPY5:%[0-9]+]]:_(<4 x s32>) = COPY $xmm1
  ; X64-NEXT:   [[CONCAT_VECTORS2:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY4]](<4 x s32>), [[COPY5]](<4 x s32>)
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[ADD:%[0-9]+]]:_(<8 x s32>) = G_ADD [[CONCAT_VECTORS]], [[CONCAT_VECTORS2]]
  ; X64-NEXT:   [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[ADD]](<8 x s32>)
  ; X64-NEXT:   $xmm0 = COPY [[UV2]](<4 x s32>)
  ; X64-NEXT:   $xmm1 = COPY [[UV3]](<4 x s32>)
  ; X64-NEXT:   RET 0, implicit $xmm0, implicit $xmm1
  %call = call <8 x i32> @split_return_callee(<8 x i32> %arg2)
  %r = add <8 x i32> %arg1, %call
  ret  <8 x i32> %r
}

define void @test_indirect_call(ptr %func) {
  ; X86-LABEL: name: test_indirect_call
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:gr32(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 0, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   CALL32r [[LOAD]](p0), csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_indirect_call
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:gr64(p0) = COPY $rdi
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   CALL64r [[COPY]](p0), csr_64, implicit $rsp, implicit $ssp
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  call void %func()
  ret void
}


declare void @take_char(i8)
define void @test_abi_exts_call(ptr %addr) {
  ; X86-LABEL: name: test_abi_exts_call
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.0, align 16)
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p0) :: (load (s8) from %ir.addr)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
  ; X86-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD1]](s8)
  ; X86-NEXT:   G_STORE [[ANYEXT]](s32), [[PTR_ADD]](p0) :: (store (s8) into stack)
  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32)
  ; X86-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8)
  ; X86-NEXT:   G_STORE [[SEXT]](s32), [[PTR_ADD1]](p0) :: (store (s8) into stack)
  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY2:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY2]], [[C]](s32)
  ; X86-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
  ; X86-NEXT:   G_STORE [[ZEXT]](s32), [[PTR_ADD2]](p0) :: (store (s8) into stack)
  ; X86-NEXT:   CALLpcrel32 @take_char, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_abi_exts_call
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p0) :: (load (s8) from %ir.addr)
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
  ; X64-NEXT:   $edi = COPY [[ANYEXT]](s32)
  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8)
  ; X64-NEXT:   $edi = COPY [[SEXT]](s32)
  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8)
  ; X64-NEXT:   $edi = COPY [[ZEXT]](s32)
  ; X64-NEXT:   CALL64pcrel32 @take_char, csr_64, implicit $rsp, implicit $ssp, implicit $edi
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  %val = load i8, ptr %addr
  call void @take_char(i8 %val)
  call void @take_char(i8 signext %val)
  call void @take_char(i8 zeroext %val)
  ret void
}

declare void @variadic_callee(ptr, ...)
define void @test_variadic_call_1(ptr %addr_ptr, ptr %val_ptr) {
  ; X86-LABEL: name: test_variadic_call_1
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[LOAD1]](p0) :: (load (s32) from %ir.val_ptr)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 8, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
  ; X86-NEXT:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4, align 1)
  ; X86-NEXT:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 8, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_variadic_call_1
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi, $rsi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p0) :: (load (s32) from %ir.val_ptr)
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   $rdi = COPY [[LOAD]](p0)
  ; X64-NEXT:   $esi = COPY [[LOAD1]](s32)
  ; X64-NEXT:   $al = MOV8ri 0
  ; X64-NEXT:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $al
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  %addr = load ptr, ptr %addr_ptr
  %val = load i32, ptr %val_ptr
  call void (ptr, ...) @variadic_callee(ptr %addr, i32 %val)
  ret void
}

define void @test_variadic_call_2(ptr %addr_ptr, ptr %val_ptr) {
  ; X86-LABEL: name: test_variadic_call_2
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1, align 16)
  ; X86-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0)
  ; X86-NEXT:   [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.addr_ptr)
  ; X86-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[LOAD1]](p0) :: (load (s64) from %ir.val_ptr, align 4)
  ; X86-NEXT:   ADJCALLSTACKDOWN32 12, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32)
  ; X86-NEXT:   G_STORE [[LOAD2]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
  ; X86-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
  ; X86-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[LOAD3]](s64), [[PTR_ADD1]](p0) :: (store (s64) into stack + 4, align 1)
  ; X86-NEXT:   CALLpcrel32 @variadic_callee, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 12, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_variadic_call_2
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi, $rsi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
  ; X64-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $rsi
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[COPY]](p0) :: (load (p0) from %ir.addr_ptr)
  ; X64-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.val_ptr)
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   $rdi = COPY [[LOAD]](p0)
  ; X64-NEXT:   $xmm0 = COPY [[LOAD1]](s64)
  ; X64-NEXT:   $al = MOV8ri 1
  ; X64-NEXT:   CALL64pcrel32 @variadic_callee, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $xmm0, implicit $al
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   RET 0
  %addr = load ptr, ptr %addr_ptr
  %val = load double, ptr %val_ptr
  call void (ptr, ...) @variadic_callee(ptr %addr, double %val)
  ret void
}

; Return value is in memory unless subtarget is AVX or higher.
define <32 x float> @test_return_v32f32() {
  ; X86-LABEL: name: test_return_v32f32
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0, align 16)
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
  ; X86-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
  ; X86-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[LOAD]](p0) :: (store (<32 x s32>))
  ; X86-NEXT:   $eax = COPY [[LOAD]](p0)
  ; X86-NEXT:   RET 0
  ; X64-LABEL: name: test_return_v32f32
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   liveins: $rdi
  ; X64-NEXT: {{  $}}
  ; X64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $rdi
  ; X64-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00
  ; X64-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
  ; X64-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[COPY]](p0) :: (store (<32 x s32>))
  ; X64-NEXT:   $rax = COPY [[COPY]](p0)
  ; X64-NEXT:   RET 0
  ret <32 x float> zeroinitializer
}

define float @test_call_v32f32() {
  ; X86-LABEL: name: test_call_v32f32
  ; X86: bb.1 (%ir-block.0):
  ; X86-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
  ; X86-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
  ; X86-NEXT:   ADJCALLSTACKDOWN32 4, 0, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $esp
  ; X86-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
  ; X86-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s32)
  ; X86-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
  ; X86-NEXT:   CALLpcrel32 @test_return_v32f32, csr_32, implicit $esp, implicit $ssp
  ; X86-NEXT:   ADJCALLSTACKUP32 4, 0, implicit-def $esp, implicit-def $eflags, implicit-def $ssp, implicit $esp, implicit $ssp
  ; X86-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
  ; X86-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s32)
  ; X86-NEXT:   $fp0 = COPY [[EVEC]](s32)
  ; X86-NEXT:   RET 0, implicit $fp0
  ; X64-LABEL: name: test_call_v32f32
  ; X64: bb.1 (%ir-block.0):
  ; X64-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
  ; X64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
  ; X64-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   $rdi = COPY [[FRAME_INDEX]](p0)
  ; X64-NEXT:   CALL64pcrel32 @test_return_v32f32, csr_64, implicit $rsp, implicit $ssp, implicit $rdi
  ; X64-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def $rsp, implicit-def $eflags, implicit-def $ssp, implicit $rsp, implicit $ssp
  ; X64-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[FRAME_INDEX]](p0) :: (load (<32 x s32>) from %stack.0)
  ; X64-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<32 x s32>), [[C]](s64)
  ; X64-NEXT:   $xmm0 = COPY [[EVEC]](s32)
  ; X64-NEXT:   RET 0, implicit $xmm0
  %vect = call <32 x float> @test_return_v32f32()
  %elt = extractelement <32 x float> %vect, i32 7
  ret float %elt
}
